clear all
capture log close
set mem 32m
set more off, permanently	
set scheme s1color, perm


********************************************************************************
********              Consumer search: pre vs post-MTU                 *********
********************************************************************************
		
		
************************************************************************************
****        Produce Figure 3: price notifications patterns, pre-MPD            *****
************************************************************************************

// Upload data: consumer-reported price notifications pre-MTU 
//	Load Mehrtanken Data
	foreach i in 2012_09 2012_10 2012_11 2012_12 2013_01 2013_02 2013_03 2013_04 2013_05 2013_06 2013_07 2013_08 2013_09 {
		clear
		insheet using "$raw\01_Mehr_Tanken\stations_`i'.csv"
		compress
		tempfile stations_`i'
		save `stations_`i'', replace
		}

	use `stations_2012_09', clear
	foreach i in 2012_10 2012_11 2012_12 2013_01 2013_02 2013_03 2013_04 2013_05 2013_06 2013_07 2013_08 2013_09 {
		append using `stations_`i''
		}
	
//	Rename data
	
	rename v1 id_data
	rename v2 fuel_type
	rename v3 price
	rename v4 received

//	Trim data id data
	
	replace id_data = ltrim(id_data)
	
//	Keep if e5, e10 or diesel

	keep if fuel_type == 1 | fuel_type == 2 | fuel_type == 4
	
	replace fuel_type = 3 if fuel_type == 4		// 1 == e10; 2 == e5; 3 == diesel


// 	Date and time
	
	gen received_date = substr(received,1,10)									
	gen date = date(received_date,"YMD")			
	format date  %td
	drop received_date
	
	gen double clock = clock(received,"YMDhms")									//	We convert the string date and time information in received into a stata clock format variable
	format clock  %tc

	gen received_time = substr(received,11,19)
	gen double time = clock(received_time,"hms")								//	We convert the string time information in received_time into a stata time format variable
	format time  %tc
	drop received_time	
	
//	Generate month and year variable

	gen month = month(date)
	gen year = year(date)

//	Generate hour and datetime information
	
	gen hour = hh(clock)
	order id date clock time hour
	
	gen double datehour=cofd(date)
	format datehour %tc
	replace datehour = datehour + hour *1000*60*60

		
//	Save data
		compress
		save "$dta\mehrtanken_pre_MTU", replace
		
		


// Prepare data: notifications reported same time, same minute are assumed to be simultaneous 

	use "$dta\mehrtanken_pre_MTU", clear		
		
* Check range of fuel prices
	sum price, d
	count if price==0															// 216 zero price notifications, 213 reported simultaneously for all fuel types on the same day by the same station
	count if price>2															// 1,319 price notifications, majority reported simultaneously for all fuel types on the same day by the same station
	
*	Gen a dummy for price notification for E5 (price2), E10 (price1) and diesel (price3)
	separate price, by(fuel_type)
	
	foreach v in 1 2 3{
		replace price`v' = 1 if price`v' != .
		replace price`v' = 0 if price`v' == .
	}

* Gen minutes for time of the price reporting (assumption: prices reported same day, same hour, same minute are reported simultaneously)
	gen minute = mm(clock)
	
* Collapse
	gcollapse (sum) price1 price2 price3, by(id_data date hour minute)
	
* Save intermediate data
	save "$dta\interm_preMTU", replace
	

	
	
// Prepare consumer reported price notifications data pre-MTU: alternative count 
	
* Load data 	
	use "$dta\interm_preMTU", clear
		
* Gen dummy for simultaneous reporting of prices for three fuel types 	
	gen all = price1==1 & price2==1 & price3==1
	
* Label all instances of simultaneous price reporting [multiple simultaneous E5, E10 and diesel price reports are dropped]
	gen var1= price1>0 & price2>0 & price3>0 & all==0			
	
	egen min_nr = rowmin(price1 price2 price3) if var1==1
	replace min_nr = 0 if var1==0
	
* Gen total number of cases when prices are reported simultaneously
	egen nr_1 = total(all)
	egen nr_2 = total(min_nr)
	gen simult_nr = nr_1 + nr_2													// 19,467,080 instances when E5, E10 and diesel prices are reported simultaneously for a given station
	
* Drop all instances of simultaneous price notifications	
	replace price1 = price1 - min_nr if var1==1
	replace price2 = price2 - min_nr if var1==1
	replace price3 = price3 - min_nr if var1==1
	
	drop if all==1
	
* Collapse by date 
	gcollapse simult_nr (sum) price1 price2 price3, by(date)
	
* Gen total number of non-simultaneous price notifications
	egen tot_price1 = total(price1)
	egen tot_price2 = total(price2)
	egen tot_price3 = total(price3)
	gen nonsimult_nr = tot_price1 + tot_price2 + tot_price3
	
* Compute a share of non-simultaneous price reports among all price reports 
	gen total_1 = nonsimult_nr + simult_nr
	display nonsimult_nr / total_1												// 16.3 % of price reports are non-simultaneous
	
* Input yearly stock of registered gasoline and diesel passenger vehicles for 2012 and 2013 
* Source: Verkehr in Zahlen 2015/2016 ("Kraftfahrzeugverkehr - Bestand an Kraftfahrzeugen und Kraftfahrzeuganhängern" Table). Accessed at https://www.bmvi.de/SharedDocs/DE/Publikationen/G/verkehr-in-zahlen_2015-pdf.pdf?__blob=publicationFile 		
	gen vehicle_nr_petrol = 31037000 											// stock of gasoline registered passenger vehicles in 2012 (Germany)
	gen vehicle_nr_diesel = 11891000 											// stock of diesel registered passenger vehicles in 2012 (Germany)	
	
	replace vehicle_nr_petrol = 30852000 if date>=d(01jan2013)					// stock in 2013
	replace vehicle_nr_diesel = 12579000 if date>=d(01jan2013)
	
	label var vehicle_nr_petrol "Yearly stock of gasoline registered passenger vehicles, DE"
	label var vehicle_nr_diesel "Yearly stock of diesel registered passenger vehicles, DE"
	
* 	Adjust number of reported price notifications pre-MTU to 2012/2013 stock of 1,000 gasoline or diesel passenger vehicles
	gen search_petrol = price1 + price2
	gen search_petrol_pv = (search_petrol / vehicle_nr_petrol)*1000
	gen search_diesel_pv = (price3 / vehicle_nr_diesel)*1000	
	
* Gen moving averages
	gen one=1
	tsset one date
	tssmooth ma ma_search_petrol_pv = search_petrol_pv, window(2 1 2)
	tssmooth ma ma_search_diesel_pv = search_diesel_pv, window(2 1 2)
	
	
//	Produce Figure 3: price notifications patterns, pre-MPD
	
* Plot smoothed number of price notifications per 1,000 vehicles pre-MTU by fuel type, when only non-simultaneous price notifications are counted
	twoway (line ma_search_diesel_pv date if date<d(31aug2013)) (line ma_search_petrol_pv date if date<d(31aug2013), lp(dash)) , ///
		ytitle("Price notifications per 1,000 vehicles", size(medium)) ///
		legend(lab(1 "Diesel") lab(2 "Gasoline") rows(1) rowgap(*.2) symx(*1) size(medsmall)) ///
		xtitle("") xlabel( `=td(01sep2012)' "1 Sep 2012" `=td(01jan2013)' "1 Jan 2013" `=td(01may2013)' "1 May 2013" `=td(01aug2013)' "1 Aug 2013", labs(medsmall)) ///
		ylabel(0(.1)0.5, grid labs(medsmall) gmax gmin) bgcolor(white) ysize(7) xsize(15) scale(1.2) 	
		
	graph export "$output\Figure_3_price_notifications.pdf", replace	
	
	
	
* PDF Format for JPE Micro
twoway (line ma_search_diesel_pv date if date<d(31aug2013) , lcolor(black)) (line ma_search_petrol_pv date if date<d(31aug2013), lp(dash) lcolor(gs0)) , ///
    ytitle("Price notifications per 1,000 vehicles") ///
    legend(lab(1 "Diesel") lab(2 "Gasoline") rows(1) rowgap(*.2) symx(*1) ) ///
    xtitle("") xlabel( `=td(01sep2012)' "1 Sep 2012" `=td(01jan2013)' "1 Jan 2013" `=td(01may2013)' "1 May 2013" `=td(01aug2013)' "1 Aug 2013", ) ///
    ylabel(0(.1)0.5, grid  gmax gmin) bgcolor(white) xsize(9) ysize(6) scale(1)

graph export "$output\Figure_3_price_notifications_bw.pdf", replace	
		
		
* Gen daily percentage difference in number of price notifications per 1,000 vehicles b/w diesel and petrol
	gen diff = (search_diesel_pv-search_petrol_pv)/search_petrol_pv
	
* Gen avg diff in number of non-simultaneously reported price notifications between diesel and gasoline pre-MTU (stations obliged to report prices starting from 31 August 2013, see https://www.bundeskartellamt.de/EN/Economicsectors/MineralOil/MTU-Fuels/mtufuels_node.html)
	egen avg_diff = mean(diff) if diff!=. & date<d(31aug2013)
	tab avg_diff
	* -> number of diesel price notifications is on average 64.41 % higher than the number of gasoline price notifications.	
	

	
// Erase data 
erase "$dta\mehrtanken_pre_MTU.dta"		
erase "$dta\interm_preMTU.dta"
				
				
	
*************************************************************************
****       Gen Figures 4: consumer search post-MTU         *****
*************************************************************************	

// Prepare consumer search data post-MTU 

* Load data on consumer search for 2015 (data available for the selected months below)
	foreach v in 01 02 03 04 05 10 11 12 {
		** Import
			import delimited "$raw\10_Consumer_search\2015-`v'.csv", clear
	
		** Clean variable names
			rename date time_stamp
			
		** Clean date variable
			gen tempd = substr(time_stamp, 1, 10)
			gen date = date(tempd, "YMD")
			format date %td
			drop tempd
			
		** Clean time variable
			gen tempt = substr(time_stamp, 12, 8)
			gen time = clock(tempt, "hms")
			format time %tcHH:MM:SS
			drop tempt
			drop time_stamp
			
		** Save locally
			tempfile search_`v'
			save `search_`v'' , replace
}
			
** Clear dta
	clear
	
** Append data on consumer search for 2015
	foreach i in 01 02 03 04 05 10 11 12 {
		append using `search_`i''
}
	
** Drop unnecessary variables
	drop station stations v14 platform
	
** Order
	order date time
	
** Clean variables	
	tab gas
	replace gas="e5" if gas=="    e5" | gas=="       e5"
	replace gas="diesel" if gas=="   diesel"
	replace gas="e10" if gas=="   e10" | gas=="      e10"
	replace gas="all" if gas=="   all" | gas=="      all"

** Save
	save "$dta\consumer_search", replace
	
	
	
// Gen Figure 4: unique daily price searchers by fuel type, post-MPD (Germany) 

* Load data 
	use "$dta\consumer_search", clear

* Search number by fuel type	
	tab gas																		// ca. 24.22 % of searches for 'all'/'undefined'
	bysort client: tab gas														// all browser searches do not distinguish among fuel types
	
* Drop when searched fuel type is all/undefined 
	drop if gas=="all"|gas=="undefined"
	
* Number of distinct users per fuel type
	bysort gas: distinct callerid 
	
* Combine E5 & E10
	replace gas="petrol" if gas=="e5"|gas=="e10"
	
* Gen daily distinct number of users searching for diesel vs petrol price	
	keep date callerid gas
	gen one=1
	duplicates drop
	gcollapse (sum) one, by(date gas)
	rename one user_nr
	
* Input number (stock) of registered petrol and diesel passenger vehicles, as of 2015 (Verkehr in Zahlen 2015/16)
	gen vehicle_nr = 30542000 if gas=="petrol"							
	replace vehicle_nr = 13861000 if gas=="diesel"		

* Gen daily number of distinct caller ids per 1,000 vehicles in circulation, by fuel type
	gen user_nr_pv = (user_nr*1000) / vehicle_nr

	encode gas, gen(gas_type)
	tsset gas_type date
	tsfill, full
	
* Replace to missing observations with downward dips 
	sum user_nr_pv, d
	replace user_nr_pv = . if user_nr_pv<.01										// 5 days	
	
* Gen Figure
	twoway (line user_nr_pv date if  gas_type==1, cmissing(n)) ///
	(line user_nr_pv date if gas_type==2, lp(dash) cmissing(n)), ///
		ytitle("Number of distinct users per 1,000 vehicles", size(medium)) ///
		legend(lab(1 "Diesel") lab(2 "Gasoline") rows(1) rowgap(*.2) symx(*1) size(medsmall)) ///
		xtitle("") xlabel( `=td(02jan2015)' "2 Jan 2015"  `=td(01mar2015)' "1 Mar 2015" `=td(01may2015)' "1 May 2015" `=td(01oct2015)' "1 Oct 2015" `=td(15nov2015)' "15 Nov 2015", labs(medsmall)) ///
		ylabel(0(.01)0.07, grid labs(medsmall) gmax gmin) bgcolor(white) ysize(7) xsize(15) scale(1.2)  
		
	graph export "$output\Figure_4_number_distinct_users_color.pdf", replace	
	
* PDF Format for JPE Micro
	twoway (line user_nr_pv date if  gas_type==1, cmissing(n) lcolor(black)) ///
	(line user_nr_pv date if gas_type==2, lp(dash) cmissing(n) lcolor(gs0)), ///
		ytitle("Number of distinct users per 1,000 vehicles") ///
		legend(lab(1 "Diesel") lab(2 "Gasoline") rows(1) rowgap(*.2) symx(*1)) ///
		xtitle("") xlabel( `=td(02jan2015)' "2 Jan 2015"  `=td(01mar2015)' "1 Mar 2015" `=td(01may2015)' "1 May 2015" `=td(01oct2015)' "1 Oct 2015" `=td(15nov2015)' "15 Nov 2015") ///
		ylabel(0(.01)0.07, grid gmax gmin) bgcolor(white) ysize(6) xsize(9) scale(1)  

graph export "$output\Figure_4_number_distinct_users_bw.pdf", replace	
	

	
	


	
// Gen Figure B7: average daily search number per user by fuel type, post-MPD (Germany)

* Load data 
	use "$dta\consumer_search", clear

* Search number by fuel type	
	tab gas																		
	bysort client: tab gas															
	
* Drop when searched fuel type is all/undefined 
	drop if gas=="all"|gas=="undefined"
	
* Collapse
	gen one=1
	gcollapse (sum) one, by(callerid gas date) 
	
* Compute avg number of searches per user for diesel / E5 / E10
	gcollapse (mean) one, by(gas date)
	rename one search_nr
	
	encode gas, gen(gas_type)
	tsset gas_type date
	tsfill, full
	
* Drop outliers
	sum search_nr, d
	replace search_nr =. if !inrange(search_nr , r(p1), r(p99))	
			
* Gen plots: avg daily number of searches per user, by fuel type	

	twoway (line search_nr date if  gas_type==1, cmissing(n)) ///
	(line search_nr date if gas_type==3, lp(dash) cmissing(n)), ///
		ytitle("Average daily search number per user", size(medium)) ///
		legend(lab(1 "Diesel") lab(2 "Gasoline") rows(1) rowgap(*.2) symx(*1) size(medsmall)) ///
		xtitle("") xlabel( `=td(02jan2015)' "2 Jan 2015"  `=td(01mar2015)' "1 Mar 2015"  `=td(01may2015)' "1 May 2015" `=td(01oct2015)' "1 Oct 2015"  `=td(15nov2015)' "15 Nov 2015", labs(medsmall)) ///
		ylabel(0(1)4, grid labs(medsmall) gmax gmin) bgcolor(white) ysize(7) xsize(15) scale(1.2) 	
		
	graph export "$output\Figure_B7_search_per_user_diesel_e5.pdf", replace	

*/		
	
	
// Erase data 
erase "$dta\consumer_search.dta"	

	
****************************************************************************************************
**        Figure 6: Evolution of Google searches for MPD-related search terms in Germany          **
****************************************************************************************************
	
		
// Gen Figure 6: Evolution of Google searches for MPD-related search terms in Germany

* Load 'Benzinpreisvergleich' topic search data (Google trends) for Germany

	import delimited "$raw\02_Other_Datasets\Google Trends_multiTimeline.csv", delimiter(comma) varnames(2) encoding(UTF-8) rowrange(1) clear 
	
* Format date variable (weekly data)	
	gen date = date(woche, "YMD")
	format date %td
	
* Destring
	destring tankstellenpreisvergleichdeutsch, force gen(tpv)
	destring markttransparenzstellefürkraftst, force gen(mtu)
		
* Gen 'Benzinpreisvergleich' topic search graph: Jan 2013 - Dec 2014	
	twoway line tpv date if date>d(01jan2013) & date<d(31dec2014),  lp(dash) lw(medthick)  || ///
		line mtu date if date>d(01jan2013) & date<d(31dec2014),   lp(dash_dot) lw(medthick) || ///
		line benzinpreisappdeutschland date if date>d(01jan2013) & date<d(31dec2014),  lp(longdash) lw(medthick) || ///
		line tankstellenappdeutschland date if date>d(01jan2013) & date<d(31dec2014), lp(shortdash) lw(medthick) || ///
		line benzinpreisvergleichdeutschland date if date>d(01jan2013) & date<d(31dec2014),  lw(medthick) ///
		ytitle("Search index", size(medium)) xtitle("") ///
		legend(lab (1 "Tankstellen Preisvergleich") lab(2 "Markttransparenzstelle für Kraftstoffe") lab(3 "Benzinpreis App") lab(4 "Tankstellen App") lab(5 "Benzinpreisvergleich") rows(3) rowgap(*.2) symx(*.9) size(medsmall)) ///
		xline(`=d(12sep2013)', lc(gs0) lw(thin)) /*xline(`=d(01dec2013)', lc(gs0) lw(medium) lp(dash))*/ ///
		xlabel( `=d(01jan2013)' "1 Jan 2013"  `=d(01jul2013)' "1 Jul 2013" `=d(01jan2014)' "1 Jan 2014" `=d(01jul2014)' "1 Jul 2014", labs(medsmall)) ylabel(, labs(medsmall)) /*ysize(7) xsize(12)  scale(1)*/		
		
	graph export "$output\Figure_6_multiple keywords_search_color.pdf", replace 	
	

	
* B/W Format for JPE Micro
	twoway line tpv date if date>d(01jan2013) & date<d(31dec2014),  lp(dash) lw(medthick) lcolor(black)  || ///
		line mtu date if date>d(01jan2013) & date<d(31dec2014),   lp(dash_dot) lw(medthick) lcolor(gs0) || ///
		line benzinpreisappdeutschland date if date>d(01jan2013) & date<d(31dec2014),  lp(longdash) lw(medthick)  lcolor(gs2) || ///
		line tankstellenappdeutschland date if date>d(01jan2013) & date<d(31dec2014), lp(shortdash) lw(medthick)  lcolor(gs4) || ///
		line benzinpreisvergleichdeutschland date if date>d(01jan2013) & date<d(31dec2014),  lw(medthick)  lcolor(gs6) ///
		ytitle("Search index") xtitle("") ///
		legend(lab (1 "Tankstellen Preisvergleich") lab(2 "Markttransparenzstelle für Kraftstoffe") lab(3 "Benzinpreis App") lab(4 "Tankstellen App") lab(5 "Benzinpreisvergleich") rows(3) rowgap(*.2) symx(*.9) ) ///
		xline(`=d(12sep2013)', lc(gs0) lw(thin)) /*xline(`=d(01dec2013)', lc(gs0) lw(medium) lp(dash))*/ ///
		xlabel( `=d(01jan2013)' "1 Jan 2013"  `=d(01jul2013)' "1 Jul 2013" `=d(01jan2014)' "1 Jan 2014" `=d(01jul2014)' "1 Jul 2014")  ysize(6) xsize(9) scale(1) 
		
	graph export "$output\Figure_6_multiple keywords_search_bw.pdf", replace 		
	
	
		
********************************************************************************
**** Figure 7: Page views of Clever Tanken, Mehr Tanken and T-mobile Tanken ****
********************************************************************************

	// Load data
		cd  "$raw"
		import excel "02_Other_Datasets\Nutzungszahlen_apps.xlsx", cellrange(A1:E25) firstrow clear
		
		
	// Divide views by 1 Million
	gen Total_m=Total/1000000
	gen Clever_Tanken_m=Clever_Tanken/1000000
	gen Mehr_Tanken_m=Mehr_Tanken/1000000
	gen TMobile_Tanken_m=TMobile_Tanken/1000000	
	
	// Add empty obs for 01.15 (for graph)
	set obs `=_N+1'
	replace Month = 20075 if Month==.
	
	* Delete 2013 data as 0 for all obs
	keep if Month>=date("20140401","YMD")
	
		
	// Figure 7: Monthly page impressions
	
		twoway (line Clever_Tanken_m Month,  lwidth(thick) lpattern(longdash) color(gray*1) cmissing(n)) ///
			(line Mehr_Tanken_m Month, lwidth(thick) lpattern(shortdash) color(red*1) cmissing(n)) ///
			(line TMobile_Tanken_m Month, lwidth(thick) lpattern(longdash_dot) color(midgreen*1.25) cmissing(n)) ///
			, title("")  ytitle("Page impressions (millions)",) legend(pos(11) ring(0) label(1 "Clever Tanken") label(2 "Mehr Tanken") label(3 "T-Mobile Tanken") rows(4)) ///
			xtitle("") xlabel(`=d(01april2014)' "April 2014" `=d(01june2014)' "June 2014" `=d(01aug2014)' "August 2014" `=d(01oct2014)' /// 
			"October 2014" `=d(01dec2014)' "December 2014" ,) ///
			ylabel(0(5)35, labs(small) nogrid) bgcolor(white) ysize(4) xsize(7) yscale(range(0 0.75)) scale(1.2)		
			
		graph export "$output\Figure_7_app_views_all_apps_color.pdf" , replace

	
* B/W Format for JPE Micro
		twoway (line Clever_Tanken_m Month,  lwidth(thick) lpattern(longdash) color(black) cmissing(n)) ///
			(line Mehr_Tanken_m Month, lwidth(thick) lpattern(shortdash) color(gs0) cmissing(n)) ///
			(line TMobile_Tanken_m Month, lwidth(thick) lpattern(longdash_dot) color(gs2) cmissing(n)) ///
			, title("")  ytitle("Page impressions (millions)",) legend(pos(11) ring(0) label(1 "Clever Tanken") label(2 "Mehr Tanken") label(3 "T-Mobile Tanken") rows(4)) ///
			xtitle("") xlabel(`=d(01april2014)' "April 2014" `=d(01june2014)' "June 2014" `=d(01aug2014)' "August 2014" `=d(01oct2014)' /// 
			"October 2014" `=d(01dec2014)' "December 2014" ,) ///
			ylabel(0(5)35, nogrid) bgcolor(white) ysize(6) xsize(9) yscale(range(0 0.75)) scale(1) 
		
		graph export "$output\Figure_7_app_views_all_apps_bw.pdf" , replace
	
	